# This file contains the S-plus code that was used to implement the estimators and functionals in "Semiparametric Smoothing of Discrete Failure Time Data", Biometrical Journal, (54): 5-19, (2012) # As a fully working example the code herein can reproduce Figure 1 of the paper (the relevant data are also given below). # The same functions (with different input data) can be used in reproducing other figures of the same paper # More general use of the estimates is also feasible by supplying the corresponding data set. Use of the functions is self - explanatory and is further facilitated by the comments at he preamble of each function. #N.B. Compatibility with R is not guaranteed as the functions were not written for use with R - however, with minimal modifications, their use in the R environment should be feasible. options(echo=FALSE) remove(objects()) ############################################################ # # obtain non-smooth (MLE) hazard rate estimate # ############################################################ lambdahat<-function(xin, cens, xout) #non smooth estimate of the discrete hazard function { n<-length(xout) mat<-sapply(1:n, function(i, xout, xin, cens) { xoutuse<-xout[i] whichtk<-which(xoutuse == xin) ## index of those T_j = t_k xinuse<-xin[whichtk] deltause<-cens[whichtk] num<- length(which(deltause != 0)) #equivalent to \sum_{i=1}^n I(T_i = t_k)\Delta_i den<-length(xinuse) c(num, den) }, xout, xin, cens) mat<-matrix(mat, ncol=2, byrow=T) numerator<-mat[,1] denuse<-mat[,2] m <- length(denuse) denominator<-sapply(1:m, function(j, denuse, m) sum(denuse[j:m]), denuse=denuse, m=m) #denominator: \sum_{j \geq k} \sum_{k=1}^n I(T_j=t_k) numerator/(denominator+1) } MLEHazardCovar<-function(xin, xinCov, cens, xout) #returns MLE (non smooth) HRE over covariate levels { xinCovDistinct <- unique(xinCov) n<-length(xinCovDistinct) result<-matrix(nrow=length(xout), ncol=n) for(i in 1:n) { whichvl<-which(xinCov== xinCovDistinct[i]) ## index of those V_j = u_l xinuse<-xin[whichvl] # select the T_j's for which I(V_j=v_l)=1 deltause<-cens[whichvl] # and the corresponding \Delta_j result[,i]<-lambdahat(xinuse, deltause, xout) # on the selected data apply the original lambdahat function } #cat(result) result #each column is a level of the covariate } ############################################################ # # end non-smooth (MLE) hazard rate estimate # Begin covariate matrix functions: Gamma (smoothing matrix) and power # ############################################################ power.matrix <- function(M, n){ #Raise matrix M to the n'th power powers <- base(n, 2) result <- diag( nrow(M) ) for(i in 1:length(powers)) { if(powers[i]) result <- result %*% M M <- M %*% M } result } base <- function(m, b){ #Express m as powers of b; m may be vector maxi <- floor(log(max(m))/log(b)) result <- matrix( 0, length(m), maxi+1 ) for(i in 1:(maxi+1)) { result[,i] <- m %% b m <- m %/% b } result } GammaMatrix1<-function(r) # Gamma (smoothing over covariate) matrix with constant trend. r = number of covariates. { Delta<- r/3# 1/(3*r) b1<- 1-Delta/r bk<- 1-2*Delta/r ak <- Delta/r Wmatrix<-diag(bk, nrow = r, ncol=r) Wmatrix[col(Wmatrix) - row(Wmatrix)==1]<- ak Wmatrix[ row(Wmatrix) - col(Wmatrix) ==1]<- ak Wmatrix[1,1]<-b1 Wmatrix[r,r]<-b1 Wmatrix } GammaMatrix2<-function(r) # Gamma (smoothing over covariate) matrix with linear trend. r = number of covariates. { den<- r*(r+1) Delta<- den/6 b1<- 1-2*Delta/den bk<- 1-4*Delta/den ak <- 2*Delta/den Wmatrix<-diag(bk, nrow = r, ncol=r) Wmatrix[col(Wmatrix) - row(Wmatrix)==1]<-ak Wmatrix[ row(Wmatrix) - col(Wmatrix) ==1]<- ak Wmatrix[1,1]<-b1 Wmatrix[r,r]<-b1 Wmatrix } ############################################################ # # end covariate (smoothing and power) functions # Begin smoothing of MLE estimate over covariate levels # ############################################################ SmoothCovEstimate<-function(xin, xinCov, cens, xout) { xinCovDistinct<-unique(xinCov) NoOfVovariates = length(xinCovDistinct) estimate<-MLEHazardCovar(xin, xinCov, cens, xout) #get MLE estimate. Matrix. Each column is a HRE for a covariate # cat("\n", is.matrix(estimate)) #estimate<-matrix( VehicleHazardRate(xout, "poisson", 1.6,0), nrow=length(xout), ncol=4) matrixuse<-GammaMatrix1(NoOfVovariates ) #calculate Gamma matrix #cat("\n", matrixuse) SmoothGammaMatrix<- power.matrix(matrixuse, 4) # get smoothing matrix - optimal power missing!!! #cat("\n", SmoothGammaMatrix) #cat(nrow(estimate), " ", ncol(estimate), " ", nrow(matrixuse), " ", ncol(matrixuse)) #exportData(data=estimate, file="file1.xls") smoothestimate<- estimate %*% SmoothGammaMatrix #exportData(data=smoothestimate, file="file2.xls") smoothestimate } ############################################################ # # end smoothing of MLE estimate over covariate # Begin smoothing over time # ############################################################ Tm<-function(tk, distribution, par1, par2) # This is used to calculate max(t_k; 1-\sum_{l=0}^k \eta_l > \epsilon), \epseilon>0 { SumDistrTk<-PdfSwitch(xout, distribution, par1, par2) n<-length(SumDistrTk) SumTK<-sapply(1:n, function(i, SumDistrTk, n) sum(SumDistrTk[i:n]), SumDistrTk=SumDistrTk, n=n) #\sum_{l=0}^n \eta_l Tmh<- 1-SumTK #1-\sum_{l=0}^n \eta_l maxTmh<-max(which(Tmh == max(Tmh)) )#Tmh[Tmh>= 0 & Tmh<= min(Tmh[Tmh>=0])] # max{1-\sum_{l=0}^n \eta_l >0} tk[maxTmh] # find the gridpoint tk which corresponds to Tmh, return the result. } CparamCalculation<-function(gamparam, VehHazard) # returns the C smoothing parameter calculated as # C= gamma/max_{k \geq 0} ( \lambda(t_{k-1}) + \lambda(t_k) + \lambda(t_{k+1}) ) { VehHazard<-c(0, VehHazard) # \lambda(t_{-1}) is set to 0 here n<-length(VehHazard)-2 Lsums<-sapply(1:n, function(i, VehHazard) sum(VehHazard[seq(i,i+2, by=1)]), VehHazard) #calculate \lambda(t_{k-1}) + \lambda(t_k) + \lambda(t_{k+1}) gamparam/max(Lsums) #return C } ################################################################################################################## ##############alternative DetermineSCprod<-function(NonParEst, VehHazard, Hvector, n, gammapar) function ######### DetermineSCprod<-function(NonParEst, VehHazard, Hvector, n, gammapar) #this finds SC = \gamma((n+1) \hat B_1)^{-1} \hat V_1 # n = number of obs, gammapar = sum of vehicle haz at xout (computed elsewhere) { VehHazard<-c(0, VehHazard) NonParEst<-c(0, NonParEst) n<-length(VehHazard)-2 B1hatElements<-sapply(1:n, function(i, VehHazard, NonParEst) ((NonParEst[i] + NonParEst[i+2])* VehHazard[i+1] - (VehHazard[i] + VehHazard[i+2])* NonParEst[i+1] )^2 , VehHazard, NonParEst) #calculate (\hat \lambda(t_{k-1}) + \hat \lambda(t_{k+1})) \lambda(t_k) - ...)^2 B1hat<-sum(B1hatElements) V1hatElements<-sapply(1:n, function(i, VehHazard, NonParEst) NonParEst[i+1]*(1-NonParEst[i+1]) *(VehHazard[i] + VehHazard[i+2])/Hvector[i] , VehHazard, NonParEst) V1hat<- sum(V1hatElements ) gammapar * V1hat / ((n+1)*B1hat) #estimated optimal product for S and C } ####################################################################################################################### DetermineSCprod<-function(NonParEst, VehHazard, Hvector, nn, gammapar) #this finds SC = \gamma((n+1) \hat B_1)^{-1} \hat V_1 # n = number of obs, gammapar = sum of vehicle haz at xout (computed elsewhere) { VehHazard<-c(0, VehHazard) NonParEst<-c(0, NonParEst) n<-length(VehHazard)-2 B1hatElements<-sapply(1:n, function(i, VehHazard, NonParEst) ( sum(NonParEst[seq(i,i+2, by=2)])* VehHazard[(i+i+2)/2] - sum(VehHazard[seq(i,i+2, by=2)])* NonParEst[(i+i+2)/2])^2 , VehHazard, NonParEst) #calculate (\hat \lambda(t_{k-1}) + \hat \lambda(t_{k+1})) \lambda(t_k) - ...)^2 B1hat<-sum(B1hatElements) V1hatElements<-sapply(1:n, function(i, VehHazard, NonParEst, Hvector){ NonParEst[(i+i+2)/2]*(1-NonParEst[(i+i+2)/2]) *sum(VehHazard[seq(i,i+2, by=2)])/Hvector[i]} , VehHazard, NonParEst, Hvector) V1hat<- sum(V1hatElements) (gammapar * V1hat) / ((nn+1)*B1hat) #estimated optimal product for S and C } SmoothedEstimate<-function(NonParEst, VehHazard, gammapar, SCproduct, Cpar) { n<-length(VehHazard)-2 # maybe length(VehHazard)-2 Sparam<- floor(SCproduct/Cpar) # S = S*C/C - this may need to change b0<- gammapar - Cpar*VehHazard[2] # b0 = \gamma - C \lambda_1 (counting starts at zero in the manuscript) bkplus1<- gammapar - Cpar * head(tail(VehHazard,2),1) # thisis the last value of the Bk vector (not specified in manuscript) ak <- Cpar * VehHazard bk <- sapply(1:n, function(i, VehHazard, Cpar, gammapar) { gammapar - Cpar * (VehHazard[i] + VehHazard[i+2])}, VehHazard, Cpar, gammapar) bk<- c(b0,bk, bkplus1) Wmatrix<-diag(bk) Wmatrix[col(Wmatrix) - row(Wmatrix)==1]<-head(ak, length(ak)-1) Wmatrix[ row(Wmatrix) - col(Wmatrix) ==1]<- tail(ak, length(ak)-1) LambdaMat<- (1/gammapar) * ( diag(1, nrow = length(VehHazard), ncol = length(VehHazard)) %*% Wmatrix) #################### test ################################# #cat("\n Vehicle Hazard = ", VehHazard, "\n LAMBDA MAT = ", NonParEst %*% power.matrix(LambdaMat,200), "\ndiag= ", diag(power.matrix(LambdaMat,200)), "\n") #aaaa<- NonParEst %*% power.matrix(LambdaMat,5) #plot(1:length(aaaa), VehHazard, type="l") #lines(1:length(aaaa), aaaa, lty=4) ########################################################### if(Sparam == 0) LambdaMatrixSpower <- diag(1, nrow=length(VehHazard), ncol= length(VehHazard)) else LambdaMatrixSpower <- power.matrix(LambdaMat, Sparam) SmoothEst<- matrix(NonParEst, nrow=1, ncol=length(VehHazard)) %*% LambdaMatrixSpower SmoothEst } VehicleHazardRate<-function(xout, vehicledistr, param1, param2) { switch(vehicledistr, binomial=dbinom(xout, param1, param2)/(1-pbinom(xout-1, param1, param2)) , geometric = dgeom(xout, param1)/(1-pgeom(xout-1, param1)) , poisson = dpois(xout, param1)/(1-ppois(xout-1, param1)) , negativebinomial = dnbinom(xout, param1, param2)/(1-pnbinom(xout-1, param1, param2)) ) } SemiparamEst<-function(xin, cens, xout, xinCov, vehicledistr, vdparam1, vdparam2=0.5) { #TkCutOff<-min(Tm(xout, Xdistr, Xpar1, Xpar2) , Tm(xout, Udistr, Upar1, Upar2), Tm(xout, vehicledistr, vdparam1, vdparam2)) # cat(xout, " ", TkCutOff) #xout<-head(xout, TkCutOff) ColOut=2*length(unique(xinCov)) result<-matrix(nrow=length(xout), ncol=ColOut/2) #Qvector <- 1-cumsum(PdfSwitch(xout-1, Xdistr, Xpar1, Xpar2)) #Pvector <- 1-cumsum(PdfSwitch(xout-1, Udistr, Upar1, Upar2)) #Hvector <- Pvector * Qvector NonparamEst<-SmoothCovEstimate(xin, xinCov, cens, xout) VehicleHazard<-rep(0.5, length=length(xout))#VehicleHazardRate(xout, vehicledistr, vdparam1, vdparam2) cat(VehicleHazard) GammaParam <- 2 #sum(VehicleHazard) Cpar <- 0.5# CparamCalculation(GammaParam , VehicleHazard) #upper bound for C 0.5# SCproduct<- 15* Cpar # DetermineSCprod(NonparamEst, VehicleHazard, Hvector , length(xin), GammaParam) #cat("\nColOut= ", ColOut) for(i in 1:ColOut/2) { SmoothEst<- SmoothedEstimate(NonparamEst[,i], VehicleHazard, GammaParam , SCproduct, Cpar ) #result[,2*i-1]<-xout result[,i]<-SmoothEst } result } RdistSwitch<-function(dist, SampleSize, par1, par2=.5) { switch(dist, binomial = rbinom(SampleSize, par1, par2) , geometric = rgeom(SampleSize, par1) , poisson = rpois(SampleSize, par1) , negativebinomial = rnbinom(SampleSize, par1, par2) , "uniform" = runif(SampleSize, par1, par2) ) } sterils<-importData("Prakash sterilis.xls") xin <-sterils$time.interval cens<-rep(1, times = length(xin)) xout<-seq(min(xin), max(xin), by=1) xinCov<-sterils$operator.negligence arg0<-MLEHazardCovar(xin, xinCov, cens, xout) arg2<-SemiparamEst(xin, cens, xout, xinCov, "geometric", .5, .5) par(mfrow=c(2,3)) plot(sort(xout), arg2[,1], type="l", xlab="x", ylab="hazard rate") plot(sort(xout), arg2[,2], type="l", xlab="x", ylab="hazard rate") plot(sort(xout), arg2[,3], type="l", xlab="x", ylab="hazard rate") col1<-c(arg2[,1], arg2[,2], arg2[,3]) col2<-rep(1:3, each=nrow(arg2)) col3<-rep(xout,3) #coll1<-c(VehicleHazardRate(xout, "poisson", 5,0), VehicleHazardRate(xout, "poisson", 5,0), VehicleHazardRate(xout, "poisson", 5,0), VehicleHazardRate(xout, "poisson", 5,0)) newmat<-data.frame("covariate level"=col2,time=col3,"hazard rate"=col1) guiPlot("3D Scatter", DataSetValues = newmat, NumConditioningVars=0)